R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

rm(list=ls())

data(uscomp)
?uscomp
## starting httpd help server ... done
head(uscomp)
##                              Assets Sales Market Value Profits Cash Flow
## Bell_Atlantic                 19788  9084        10636  1092.9    2576.8
## Continental_Telecom            5074  2557         1892   239.9     578.3
## American_Electric_Power       13621  4848         4572   485.0     898.9
## Brooklyn_Union_Gas             1117  1038          478    59.7      91.7
## Central_Illinois_Publ._Serv.   1633   701          679    74.3     135.9
## Cleveland_Electric_Illum.      5651  1254         2002   310.7     407.9
##                              Employees        Sector
## Bell_Atlantic                     79.4 Communication
## Continental_Telecom               21.9 Communication
## American_Electric_Power           23.4        Energy
## Brooklyn_Union_Gas                 3.8        Energy
## Central_Illinois_Publ._Serv.       2.8        Energy
## Cleveland_Electric_Illum.          6.2        Energy
uscomp$Sales = as.numeric(uscomp$Assets)
summary(uscomp)
##      Assets          Sales        Market Value        Profits      
##  Min.   :  223   Min.   :  223   Min.   :   53.0   Min.   :-771.5  
##  1st Qu.: 1122   1st Qu.: 1122   1st Qu.:  512.5   1st Qu.:  39.0  
##  Median : 2788   Median : 2788   Median :  944.0   Median :  70.5  
##  Mean   : 5941   Mean   : 5941   Mean   : 3269.1   Mean   : 209.8  
##  3rd Qu.: 5802   3rd Qu.: 5802   3rd Qu.: 1961.5   3rd Qu.: 188.1  
##  Max.   :52634   Max.   :52634   Max.   :95697.0   Max.   :6555.0  
##                                                                    
##    Cash Flow         Employees                Sector  
##  Min.   :-651.90   Min.   :  0.60   Finance      :17  
##  1st Qu.:  75.15   1st Qu.:  3.95   Energy       :15  
##  Median : 133.30   Median : 15.40   Manufacturing:10  
##  Mean   : 400.93   Mean   : 37.60   Retail       :10  
##  3rd Qu.: 328.85   3rd Qu.: 48.50   HiTech       : 8  
##  Max.   :9874.00   Max.   :400.20   Other        : 7  
##                                     (Other)      :12

Including Plots

You can also embed plots, for example:

mat_num = uscomp[,1:6]
mat_num= as.matrix(mat_num)
corrplot(cor(mat_num), 
         method = "shade", 
         type = "upper", 
         bg = "blue",
         title = "Correlation matrix between numerical variables",
         is.corr = TRUE,
         cl.cex = 0.8,
         tl.cex = 0.9,
         tl.col='black',
         tl.srt = 15
         )

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

all_companies = round(table(uscomp$Sector),0)

sectors = round(table(uscomp$Sector) * 2/5,0)

smaller_data = data.frame(matrix(data = NA, nrow = 0, ncol = dim(uscomp)[2] ))

for (i in 1:length(sectors)){
  sector = subset(uscomp, uscomp$Sector == names(sectors[i]))[sample(1:all_companies[[i]], sectors[[i]]),]
  smaller_data = rbind(smaller_data,sector)
}

mat = smaller_data[1:6]

heatmaply(mat, 
          dendrogram = "none",
          xlab = "Numerical variables", ylab = "COmpanies", 
          main = "HeatMap",
          scale = "column",
          margins = c(60,100,40,20),
          grid_color = "white",
          grid_width = 0.00001,
          titleX = TRUE,
          hide_colorbar = TRUE,
          branches_lwd = 0.1,
          label_names = c("Company", "Feature:", "Value"),
          fontsize_row = 5, fontsize_col = 5,
          labCol = colnames(mat),
          labRow = rownames(mat),
          heatmap_layers = theme(axis.line=element_blank()),

)